


** Merge together CEPR's CPS ORG Extracts **

use "${path}\Raw Data\CPS MORG\cepr_org_1979\cepr_org_1979.dta", clear

forvalues y = 1980/2019 {
append using "${path}\Raw Data\CPS MORG\cepr_org_`y'\cepr_org_`y'.dta", force
}

save "${path}\Clean Data\CPS.dta", replace


** Merge in occ1990 crosswalks that map each year's Census codes to David Dorn's occupation variable **

merge m:1 occ70 using "${path}\Clean Data\Crosswalks\occ1970_occ1990dd.dta", nogenerate
rename occ1990dd occ
merge m:1 occ80 using "${path}\Clean Data\Crosswalks\occ1980_occ1990dd.dta", nogenerate 
replace occ = occ1990dd if occ == .
replace occ = 22 if occ80 == 2 | occ80 == 21
drop occ1990dd
rename occ03 occ00
replace occ00 = occ00/10 if occ00 != .

merge m:1 occ00 using "${path}\Clean Data\Crosswalks\occ2000_occ1990dd.dta", nogenerate

replace occ = occ1990dd if occ == .
drop occ1990dd
rename occ occ1990dd

** Merge in Occupation task content data **

merge m:1 occ1990dd using "${path}\Clean Data\Crosswalks\occ1990dd_task_alm.dta", nogenerate
rename occ1990dd occ

** Keep relevant sample ** 

keep if year < 2008 /* Keep only years for which I have Romer and Romer shock var */
keep if age > 24 & age < 66 /* Drop non-working-age */
drop if lfstat == 3 /* Drop out of labor force */
drop if selfemp == 1 /* Drop the self-employed */
drop if pubsect == 1 /* Drop public sector */ 
drop if occ >= 473 & occ <= 498 /* Drop farm workers */

*replace rw = 0 if lfstat == 2
replace hourslw = uhourse if hourslw == . & rw != . /* Replace hours with usual hours if hours last week missing */

gen emp = 0
gen unemp = 0
gen total = 1

replace emp = 1 if lfstat == 1
replace unemp = 1 if lfstat == 2

***********************************
* Generate Uniform Industry Groups
***********************************

gen ind = ind_m03 

* Agriculture
replace ind = 1 if ind_nber == 1 | ind_nber == 2 | ind_nber == 46

* Mining
replace ind = 2 if ind_nber == 3
drop if ind == 2

* Construction
replace ind = 3 if ind_nber == 4

* Manufacturing
replace ind = 4 if ind_nber >= 5 & ind_nber <= 28

* Wholesale and Retail Trade
replace ind = 5 if ind_nber == 32 | ind_nber == 33

* Transportation
replace ind = 6 if ind_nber == 29

* Information
replace ind = 7 if ind_nber == 30

* Finance 
replace ind = 8 if ind_nber == 34 | ind_nber == 35

* Services
replace ind = 9 if ind_nber == 31 | ind_nber == 37 | ind_nber == 45 

** These two have to be broken up
* Education 
replace ind = 10 if ind_nber ==  43

* Health and Social Services
replace ind = 14 if ind_nber == 41 | ind_nber == 42 | ind_2d == 42 | ind_2d == 41 | ind_2d == 43 | ind_nber == 44

* Leisure/Hospitality 
replace ind = 11 if ind_nber == 40 

* Other
replace ind = 12 if ind_nber == 39 | ind_nber == 38 | ind_nber == 36

* Public Admin
replace ind = 13 if ind_nber == 52

* Low-Skill Health
replace ind = 15 if ind == 14 & educ < 3

drop _merge
merge m:1 ind using "${path}\Clean Data\Responsive Industries.dta", nogenerate

** Pick out industries in which capital is highly responsive **
gen cap = 1
replace cap = 2 if Top == 1 /* High response industries */
*replace cap = 2 if (occ >= 318 & occ <= 344) | occ == 308 | occ == 313 | occ == 379 | occ == 386 | occ == 389
replace cap = 2 if occ == 389

*************************************
* Create 2 Broad Occupation Groups
*************************************
drop OCC
gen OCC = 1

replace task_abstract = task_abstract + 1
replace task_manual = task_manual + 1

gen n = 1
gen RTI = log(task_routine) - log(task_abstract) - log(task_manual)
sort n
by n: egen RTIp66 = pctile(RTI), p(66) /* Top third most routine, following Autor et al. */

replace OCC = 2 if RTI > RTIp66

*************************************
* Create 3 Broad Occupation Groups
*************************************

gen ATI = log(task_abstract) - log(task_routine) - log(task_manual)
sort n
by n: egen ATIp66 = pctile(ATI), p(66) /* Top third most abstract */

replace OCC = 3 if ATI > ATIp66 & RTI <= RTIp66

* Frequency weights for summary statistics
gen forgwgt = int(orgwgt)

rename rw RealWage
rename hourslw Hours
gen WeeklyLaborIncome = RealWage*Hours

gen OCC4 = .
replace OCC4 = 1 if cap == 1 & OCC == 1
replace OCC4 = 2 if cap == 1 & OCC == 2
replace OCC4 = 3 if cap == 1 & OCC == 3
replace OCC4 = 4 if cap == 2 & OCC == 1
replace OCC4 = 5 if cap == 2 & OCC == 2
replace OCC4 = 6 if cap == 2 & OCC == 3

gen college = 0
replace college = 1 if educ >= 4

gen OCC5 = .

replace OCC5 = 1 if college == 1 & OCC == 1
replace OCC5 = 2 if college == 1 & OCC == 2
replace OCC5 = 3 if college == 1 & OCC == 3
replace OCC5 = 4 if college == 0 & OCC == 1
replace OCC5 = 5 if college == 0 & OCC == 2
replace OCC5 = 6 if college == 0 & OCC == 3


egen count = sum(orgwgt), by(occ)

drop count
gen count = 1

replace Hours = . if Hours > 100

drop if (year == 2003 & (month == 1 | month == 2 | month == 3)) | ( year == 2002 & (month == 10 | month == 11 | month == 12))


save "${path}\Clean Data\CPS Clean.dta", replace

*************************************************
******************* Monthly *********************
*************************************************


collapse (sum) WeeklyLaborIncome emp unemp (mean) Hours RealWage [fw=forgwgt], by(OCC year month)

bysort year month: egen UnEmp = total(unemp)

save "${path}\Clean Data\CPS Clean 1.dta", replace

clear

import excel "${path}\Raw Data\FRED\MacroVars.xls", sheet("Sheet1") firstrow clear


merge 1:m year month using "${path}\Clean Data\CPS Clean 1.dta"
keep if _merge == 3
drop _merge


sort OCC year month
gen time = _n

gen date = ym(year, month)
merge m:1 date using "${path}\Clean Data\Monetary Shocks\RRShocks_M.dta", nogenerate

save "${path}\Clean Data\CPS Final.dta", replace

use "${path}\Clean Data\CPS Clean.dta", clear


collapse (sum) WeeklyLaborIncome emp unemp (mean) Hours RealWage [fw=forgwgt], by(OCC4 year month)

bysort year month: egen UnEmp = total(unemp)

save "${path}\Clean Data\CPS Clean 1.dta", replace

clear

import excel "${path}\Raw Data\FRED\MacroVars.xls", sheet("Sheet1") firstrow clear


merge 1:m year month using "${path}\Clean Data\CPS Clean 1.dta"
keep if _merge == 3
drop _merge


sort OCC year month
gen time = _n

gen date = ym(year, month)
merge m:1 date using "${path}\Clean Data\Monetary Shocks\RRShocks_M.dta", nogenerate

save "${path}\Clean Data\CPS Final Ind.dta", replace

**************************************************
* Summary Stats
**************************************************/

use "${path}\Clean Data\CPS Clean.dta", clear
*
putexcel set "${path}\Results\Summary Statistics\CPS Summary", modify sheet(Sheet2)

* Median Real Wage
tabstat RealWage [w=orgwgt] if year == 1980 & OCC == 3, stat(mean sd) save
mat A = r(StatTotal)
putexcel C10 = matrix(A)

tabstat RealWage [w=orgwgt] if year == 1980 & OCC == 2, stat(mean sd) save
mat A = r(StatTotal)
putexcel D10 = matrix(A)

tabstat RealWage [w=orgwgt] if year == 1980 & OCC == 1, stat(mean sd) save
mat A = r(StatTotal)
putexcel E10 = matrix(A)

tabstat RealWage [w=orgwgt] if year == 2007 & OCC == 3, stat(mean sd) save
mat A = r(StatTotal)
putexcel F10 = matrix(A)

tabstat RealWage [w=orgwgt] if year == 2007 & OCC == 2, stat(mean sd) save
mat A = r(StatTotal)
putexcel G10 = matrix(A)

tabstat RealWage [w=orgwgt] if year == 2007 & OCC == 1, stat(mean sd) save
mat A = r(StatTotal)
putexcel H10 = matrix(A)

*Number of Observations
tabstat count if year == 1980 & OCC == 3, stat(sum) save
mat A = r(StatTotal)
putexcel C22 = matrix(A)

tabstat count if year == 1980 & OCC == 2, stat(sum) save
mat A = r(StatTotal)
putexcel D22 = matrix(A)

tabstat count if year == 1980 & OCC == 1, stat(sum) save
mat A = r(StatTotal)
putexcel E22 = matrix(A)

tabstat count if year == 2007 & OCC == 3, stat(sum) save
mat A = r(StatTotal)
putexcel F22 = matrix(A)

tabstat count if year == 2007 & OCC == 2, stat(sum) save
mat A = r(StatTotal)
putexcel G22 = matrix(A)

tabstat count if year == 2007 & OCC == 1, stat(sum) save
mat A = r(StatTotal)
putexcel H22 = matrix(A)


local x = 0
foreach var in age college female {
   
   local x = `x' + 1
   
   local y = 10 + `x'*3
   
tabstat `var' [w=orgwgt] if year == 1980 & OCC == 3, stat(mean sd) save
mat A = r(StatTotal)
putexcel C`y' = matrix(A)

tabstat `var' [w=orgwgt] if year == 1980 & OCC == 2, stat(mean sd) save
mat A = r(StatTotal)
putexcel D`y' = matrix(A)

tabstat `var' [w=orgwgt] if year == 1980 & OCC == 1, stat(mean sd) save
mat A = r(StatTotal)
putexcel E`y' = matrix(A)

tabstat `var' [w=orgwgt] if year == 2007 & OCC == 3, stat(mean sd) save
mat A = r(StatTotal)
putexcel F`y' = matrix(A)

tabstat `var' [w=orgwgt] if year == 2007 & OCC == 2, stat(mean sd) save
mat A = r(StatTotal)
putexcel G`y' = matrix(A)

tabstat `var' [w=orgwgt] if year == 2007 & OCC == 1, stat(mean sd) save
mat A = r(StatTotal)
putexcel H`y' = matrix(A)

	}



